Reads were first de-multiplexed with ea-utils (fastq-multx) with an allowed barcode mismatch of 1 (Default). De-multiplexed reads were then subjected to an alignment against the UniVec vector contamination database using the adapter removal tool within ea-utils (fastq-mcf). Barcodes were extracted by remove the last 12 nucleotides from each read.
Next reads were subjected to an alignment against the Oligo library using a DNA aligner. We used Bowtie2…
We are going to first import the merged counts tables that is outputted from the Nextflow pipeline. This table already has the raw counts summarised from the alignments and merged into a single matrix.
# Import counts
cd8.counts = readRDS('../data/CD8-counts.rda')
# Convert to long format
cd8.counts.long = cd8.counts %>%
rownames_to_column("Id") %>%
pivot_longer(cols = -c('Id'), names_to = 'sample', values_to = 'counts')
# Import CD4 metadata
cd8.metadata = readRDS('../data/CD8-metadata.rda')
# Import MultiQC results
cd8.report = readRDS('../data/CD8-multiqc.rda')
# Import oligo annotations
oligo.genes = readRDS('../data/oligo-genes.rda')
# Mean
mean(cd8.report$raw_total_sequences)
## [1] 14162045
# Median
median(cd8.report$raw_total_sequences)
## [1] 13417612
# Range
range(cd8.report$raw_total_sequences)
## [1] 8434046 23626015
cd8.report %>%
select(Sample, reads_unmapped, reads_mapped) %>%
mutate(Sample = str_replace(Sample, '-bamstats', '')) %>%
pivot_longer(cols = c("reads_unmapped", "reads_mapped"), names_to = "feature", values_to = "reads") %>%
ggplot(aes(x = Sample, y = reads, fill = feature)) +
geom_col() +
coord_flip() +
theme_light(base_size = 11) +
theme(legend.position = "top") +
scale_fill_aaas() +
xlab("") +
ylab("Number of reads")
cd8.report %>%
select(Sample, reads_unmapped_percent, reads_mapped_percent) %>%
mutate(Sample = str_replace(Sample, '-bamstats', '')) %>%
pivot_longer(cols = c("reads_unmapped_percent", "reads_mapped_percent"), names_to = "feature", values_to = "reads") %>%
mutate(reads = reads / 100) %>%
ggplot(aes(x = Sample, y = reads, fill = feature)) +
geom_col() +
coord_flip() +
theme_light(base_size = 11) +
theme(legend.position = "top") +
scale_fill_aaas() +
scale_y_continuous(labels = scales::percent) +
xlab("") +
ylab("Percent of reads")
cd8.counts %>%
ggplot(., aes(x = log10(mock_a), y = log10(mock_b))) +
geom_point(colour = alpha("grey", 0.7)) +
xlab("Mock replicate #1 (log10)") +
ylab("Mock replicate #2 (log10)") +
theme_light(base_size = 11) +
stat_cor(method = "pearson", aes(label = ..r.label..))
cd8.counts %>%
ggplot(aes(x = log10(id3_a), y = log10(id3_b))) +
geom_point(colour = alpha("grey", 0.7)) +
xlab("1D3 replicate #1 (log10)") +
ylab("1D3 replicate #2 (log10)") +
theme_light(base_size = 11) +
stat_cor(method = "pearson", aes(label = ..r.label..))
cd8.counts %>%
ggplot(aes(x = log10(cdk453_a), y = log10(cdk453_b))) +
geom_point(colour = alpha("grey", 0.7)) +
xlab("CDK4/53 replicate #1 (log10)") +
ylab("CDK4/53 replicate #2 (log10)") +
theme_light(base_size = 11) +
stat_cor(method = "pearson", aes(label = ..r.label..))
cd8.counts %>%
ggplot(aes(x = log10(d1_10_a), y = log10(d1_10_b))) +
geom_point(colour = alpha("grey", 0.7)) +
xlab("1:10 replicate #1 (log10)") +
ylab("1:10 replicate #2 (log10)") +
theme_light(base_size = 11) +
stat_cor(method = "pearson", aes(label = ..r.label..))
cd8.counts %>%
ggplot(aes(x = log10(d1_100_a), y = log10(d1_100_b))) +
geom_point(colour = alpha("grey", 0.7)) +
xlab("1:100 replicate #1 (log10)") +
ylab("1:100 replicate #2 (log10)") +
theme_light(base_size = 11) +
stat_cor(method = "pearson", aes(label = ..r.label..))
cd8.counts %>%
ggplot(aes(x = log10(d1_300_a), y = log10(d1_300_b))) +
geom_point(colour = alpha("grey", 0.7)) +
xlab("1:300 replicate #1 (log10)") +
ylab("1:300 replicate #2 (log10)") +
theme_light(base_size = 11) +
stat_cor(method = "pearson", aes(label = ..r.label..))
cd8.counts %>%
ggplot(aes(x = log10(d1_1000_a), y = log10(d1_1000_b))) +
geom_point(colour = alpha("grey", 0.7)) +
xlab("1:1000 replicate #1 (log10)") +
ylab("1:1000 replicate #2 (log10)") +
theme_light(base_size = 11) +
stat_cor(method = "pearson", aes(label = ..r.label..))
# Calculate the variance between oligo-replicates
oligo.variance = cd8.counts %>%
rownames_to_column("Id") %>%
mutate_if(is.numeric, funs(log10)) %>%
left_join(oligo.genes, by = "Id") %>%
group_by(oligoGroup) %>%
summarise(mock_a = diff(mock_a),
mock_b = diff(mock_b),
cdk453_a = diff(cdk453_a),
cdk453_b = diff(cdk453_b),
id3_a = diff(id3_a),
id3_b = diff(id3_b),
d1_10_a = diff(d1_10_a),
d1_10_b = diff(d1_10_b),
d1_100_a = diff(d1_100_a),
d1_100_b = diff(d1_100_b),
d1_300_a = diff(d1_300_a),
d1_300_b = diff(d1_300_b),
d1_1000_a = diff(d1_1000_a),
d1_1000_b = diff(d1_1000_b))
# Plot histogram of variance across samples
oligo.variance %>%
pivot_longer(-oligoGroup, names_to = "Sample", values_to = "difference") %>%
ggplot(aes(x = difference)) +
geom_histogram(bins = 100) +
theme_bw() +
facet_wrap(.~Sample) +
xlab("Difference (log10)")
# Match the sample ID's between samples
cd8.counts = cd8.counts[,match(cd8.metadata$Sample, colnames(cd8.counts))]
# Make DEseq2 object
ddsMat.cd8 <- DESeqDataSetFromMatrix(countData = cd8.counts,
colData = cd8.metadata,
design = ~Group)
## converting counts to integer mode
# Get normalize counts
ddsMat.cd8 <- estimateSizeFactors(ddsMat.cd8)
ddsMat.cd8 <- estimateDispersions(ddsMat.cd8, fitType='local')
## gene-wise dispersion estimates
## mean-dispersion relationship
## final dispersion estimates
# Get normalized counts
cd8.norm <- counts(ddsMat.cd8, normalized = TRUE) %>%
as.data.frame() %>%
rownames_to_column("Id")
# Write tables to disk
write_tsv(cd8.counts, "tables/cd8-counts.tsv")
write_tsv(cd8.norm, "tables/cd8-counts-norm.tsv")
# Create a long table
cd8.norm.long = cd8.norm %>%
gather(sample, norm.counts, -Id)
# Pre-normalized
cd8.counts.long %>%
ggplot(aes(x = sample, y = log10(counts + 1))) +
geom_boxplot() +
theme_light(base_size = 11) +
theme(axis.text.x = element_text(angle = 90)) +
xlab("") +
ylab("Counts (log10)") +
ggtitle('Un-normalized data')
# After normalized
cd8.norm.long %>%
ggplot(aes(x = sample, y = log10(norm.counts + 1))) +
geom_boxplot() +
theme_light(base_size = 11) +
theme(axis.text.x = element_text(angle = 90)) +
xlab("") +
ylab("Normalized counts (log10)") +
ggtitle('Median-normalized data')
cd8.norm.long %>%
ggplot(aes(x = log10(norm.counts))) +
geom_histogram(aes(y=..density..), binwidth=0.25, colour="black", fill="white") +
geom_density(alpha = 0.2, fill = "#FF6666") +
facet_wrap(sample~.) +
theme_light(base_size = 11) +
ylab("Density") +
xlab("Normalized reads per oligo (log10)")
Oligo Id’s for the positive control dropout’s are: - oligo_1899 (MART1_ELA) - oligo_4281 (MART1_ELA) - oligo_912 (MART1) - oligo_3294 (MART1)
# Get average between the replicates
id3.mean = cd8.norm %>%
mutate(Mock_mean = rowMeans(select(., mock_a, mock_b)),
ID3_mean = rowMeans(select(., id3_a, id3_b))) %>%
mutate(M = log2(ID3_mean / Mock_mean),
A = (log2(ID3_mean) + log2(Mock_mean)) / 2)
# Plot scatterplot
id3.mean %>%
ggplot(aes(x = log10(Mock_mean), y = log10(ID3_mean))) +
geom_point(colour = alpha("grey", 0.7)) +
geom_point(colour = alpha("red", 0.7), data = filter(id3.mean, Id %in% c("oligo_4281", "oligo_1899"))) +
geom_point(colour = alpha("lightblue", 0.7), data = filter(id3.mean, Id %in% c("oligo_913", "oligo_3295"))) +
geom_point(colour = alpha("orange", 0.7), data = filter(id3.mean, Id %in% c("oligo_1888", "oligo_4270"))) +
geom_point(colour = alpha("darkgreen", 0.7), data = filter(id3.mean, Id %in% c("oligo_1887", "oligo_4269"))) +
theme_light(base_size = 11) +
xlab("Mock normalized counts (log10)") +
ylab("1D3 normalized counts (log10)")
# Plot MA plot
id3.mean %>%
ggplot(aes(x = A, y = M)) +
geom_point(colour = alpha("grey", 0.5)) +
geom_point(colour = alpha("red", 0.7), data = filter(id3.mean, Id %in% c("oligo_4281", "oligo_1899"))) +
geom_point(colour = alpha("lightblue", 0.7), data = filter(id3.mean, Id %in% c("oligo_913", "oligo_3295"))) +
geom_point(colour = alpha("orange", 0.7), data = filter(id3.mean, Id %in% c("oligo_1888", "oligo_4270"))) +
geom_point(colour = alpha("darkgreen", 0.7), data = filter(id3.mean, Id %in% c("oligo_1887", "oligo_4269"))) +
theme_light(base_size = 11) +
xlab(expression(paste("Mean normalized counts ", '(', log[2], ")"))) +
ylab(expression(paste("Fold change ", '(', log[2], " 1D3/Mock)")))
# Create new DESeq2 object with poor samples removed
ddsMat.subset = ddsMat.cd8[,which(colnames(assay(ddsMat.cd8)) %in% c("mock_a", "mock_b", "id3_a", "id3_b"))]
ddsMat.subset$Group = droplevels(ddsMat.subset$Group)
ddsMat.subset$Group = relevel(ddsMat.subset$Group, "Mock")
# Run stats
ddsMat.subset <- DESeq(ddsMat.subset)
## using pre-existing size factors
## estimating dispersions
## found already estimated dispersions, replacing these
## gene-wise dispersion estimates
## mean-dispersion relationship
## final dispersion estimates
## fitting model and testing
# Get results (ID3 / Mock)
results(ddsMat.subset, alpha = 0.05) %>%
summary()
##
## out of 4762 with nonzero total read count
## adjusted p-value < 0.05
## LFC > 0 (up) : 0, 0%
## LFC < 0 (down) : 5, 0.1%
## outliers [1] : 0, 0%
## low counts [2] : 0, 0%
## (mean count < 0)
## [1] see 'cooksCutoff' argument of ?results
## [2] see 'independentFiltering' argument of ?results
# Get table of results
id3.mock.res = results(ddsMat.subset, alpha = 0.05)
# View table
id3.mock.res %>%
as.data.frame() %>%
rownames_to_column("Id") %>%
filter(padj < 0.05)
## Id baseMean log2FoldChange lfcSE stat pvalue
## 1 oligo_1899 1616.221 -2.5925091 0.1668882 -15.534411 2.029294e-54
## 2 oligo_3295 2359.897 -1.1754412 0.1121242 -10.483386 1.029893e-25
## 3 oligo_4281 1750.376 -4.1132380 0.1543848 -26.642770 2.170495e-156
## 4 oligo_4374 1994.070 -0.5718696 0.1206310 -4.740650 2.130336e-06
## 5 oligo_913 2108.672 -0.6154804 0.1159946 -5.306114 1.119872e-07
## padj
## 1 4.831750e-51
## 2 1.634784e-22
## 3 1.033590e-152
## 4 2.028932e-03
## 5 1.333208e-04
# Show quick plot of significance ()
DESeq2::plotMA(id3.mock.res, ylim = c(-3, 3))
Oligo Id’s for the positive control dropout’s are: - oligo_1899 (MART1_ELA) - oligo_4281 (MART1_ELA) - oligo_912 (MART1) - oligo_3294 (MART1)
# Get average between the replicates
cdk4.mean = cd8.norm %>%
mutate(Mock_mean = rowMeans(select(., mock_a, mock_b)),
CDK4_mean = rowMeans(select(., cdk453_a, cdk453_b))) %>%
mutate(M = log2(CDK4_mean / Mock_mean),
A = (log2(CDK4_mean) + log2(Mock_mean)) / 2)
# Plot scatterplot
cdk4.mean %>%
ggplot(aes(x = log10(Mock_mean), y = log10(CDK4_mean))) +
geom_point(colour = alpha("grey", 0.7)) +
geom_point(colour = alpha("red", 0.7), data = filter(cdk4.mean, Id %in% c("oligo_4281", "oligo_1899"))) +
geom_point(colour = alpha("orange", 0.7), data = filter(cdk4.mean, Id %in% c("oligo_1888", "oligo_4270"))) +
geom_point(colour = alpha("lightblue", 0.7), data = filter(cdk4.mean, Id %in% c("oligo_913", "oligo_3295"))) +
geom_point(colour = alpha("darkgreen", 0.7), data = filter(cdk4.mean, Id %in% c("oligo_1887", "oligo_4269"))) +
theme_light(base_size = 11) +
xlab("Mock normalized counts (log10)") +
ylab("CDK4 normalized counts (log10)")
# Plot MA plot
cdk4.mean %>%
ggplot(aes(x = A, y = M)) +
geom_point(colour = alpha("grey", 0.5)) +
geom_point(colour = alpha("red", 0.7), data = filter(cdk4.mean, Id %in% c("oligo_4281", "oligo_1899"))) +
geom_point(colour = alpha("orange", 0.7), data = filter(cdk4.mean, Id %in% c("oligo_1888", "oligo_4270"))) +
geom_point(colour = alpha("lightblue", 0.7), data = filter(cdk4.mean, Id %in% c("oligo_913", "oligo_3295"))) +
geom_point(colour = alpha("darkgreen", 0.7), data = filter(cdk4.mean, Id %in% c("oligo_1887", "oligo_4269"))) +
theme_light(base_size = 11) +
xlab(expression(paste("Mean normalized counts ", '(', log[2], ")"))) +
ylab(expression(paste("Fold change ", '(', log[2], " CDK4/Mock)")))
# Create new DESeq2 object with poor samples removed
ddsMat.subset = ddsMat.cd8[,which(colnames(assay(ddsMat.cd8)) %in% c("mock_a", "mock_b", "cdk453_a", "cdk453_b"))]
ddsMat.subset$Group = droplevels(ddsMat.subset$Group)
ddsMat.subset$Group = relevel(ddsMat.subset$Group, "Mock")
# Run stats
ddsMat.subset <- DESeq(ddsMat.subset)
## using pre-existing size factors
## estimating dispersions
## found already estimated dispersions, replacing these
## gene-wise dispersion estimates
## mean-dispersion relationship
## final dispersion estimates
## fitting model and testing
# Get results (CDK4 / Mock)
results(ddsMat.subset, alpha = 0.05) %>%
summary()
##
## out of 4759 with nonzero total read count
## adjusted p-value < 0.05
## LFC > 0 (up) : 3, 0.063%
## LFC < 0 (down) : 4, 0.084%
## outliers [1] : 0, 0%
## low counts [2] : 0, 0%
## (mean count < 0)
## [1] see 'cooksCutoff' argument of ?results
## [2] see 'independentFiltering' argument of ?results
# Get table of results
cdk4.mock.res = results(ddsMat.subset, alpha = 0.05)
# View table
cdk4.mock.res %>%
as.data.frame() %>%
rownames_to_column("Id") %>%
filter(padj < 0.05)
## Id baseMean log2FoldChange lfcSE stat pvalue
## 1 oligo_1887 772.9214 -2.8901569 0.21330745 -13.549254 8.003909e-42
## 2 oligo_1895 6742.4730 -0.3312007 0.07032309 -4.709700 2.480816e-06
## 3 oligo_199 1372.7466 0.7148753 0.15410883 4.638769 3.504898e-06
## 4 oligo_3925 7876.4527 0.2904682 0.06628197 4.382311 1.174271e-05
## 5 oligo_4269 620.7388 -4.4225899 0.32319890 -13.683803 1.268798e-42
## 6 oligo_4311 8468.5055 0.2476976 0.05916615 4.186475 2.833199e-05
## 7 oligo_4374 1934.2865 -0.6837166 0.12417351 -5.506139 3.667891e-08
## padj
## 1 1.904530e-38
## 2 2.951551e-03
## 3 3.335962e-03
## 4 9.313924e-03
## 5 6.038211e-39
## 6 1.926170e-02
## 7 5.818497e-05
# Show quick plot of significance ()
DESeq2::plotMA(cdk4.mock.res)
Oligo Id’s for the positive control dropout’s are: - oligo_1899 (MART1_ELA) - oligo_4281 (MART1_ELA) - oligo_912 (MART1) - oligo_3294 (MART1)
# Get average between the replicates
id3.mean = cd8.norm %>%
mutate(Mock_mean = rowMeans(select(., mock_a, mock_b)),
CDK4_mean = rowMeans(select(., cdk453_a, cdk453_b)),
ID3_mean = rowMeans(select(., id3_a, id3_b))) %>%
mutate(M = log2(ID3_mean / CDK4_mean),
A = (log2(ID3_mean) + log2(CDK4_mean)) / 2)
# Plot scatterplot
id3.mean %>%
ggplot(aes(x = log10(CDK4_mean), y = log10(ID3_mean))) +
geom_point(colour = alpha("grey", 0.7)) +
geom_point(colour = alpha("red", 0.7), data = filter(id3.mean, Id %in% c("oligo_4281", "oligo_1899"))) +
geom_point(colour = alpha("lightblue", 0.7), data = filter(id3.mean, Id %in% c("oligo_913", "oligo_3295"))) +
geom_point(colour = alpha("orange", 0.7), data = filter(id3.mean, Id %in% c("oligo_1888", "oligo_4270"))) +
geom_point(colour = alpha("darkgreen", 0.7), data = filter(id3.mean, Id %in% c("oligo_1887", "oligo_4269"))) +
theme_light(base_size = 11) +
xlab("CDK4 normalized counts (log10)") +
ylab("1D3 normalized counts (log10)")
# Plot MA plot
id3.mean %>%
ggplot(aes(x = A, y = M)) +
geom_point(colour = alpha("grey", 0.5)) +
geom_point(colour = alpha("red", 0.7), data = filter(id3.mean, Id %in% c("oligo_4281", "oligo_1899"))) +
geom_point(colour = alpha("lightblue", 0.7), data = filter(id3.mean, Id %in% c("oligo_913", "oligo_3295"))) +
geom_point(colour = alpha("orange", 0.7), data = filter(id3.mean, Id %in% c("oligo_1888", "oligo_4270"))) +
geom_point(colour = alpha("darkgreen", 0.7), data = filter(id3.mean, Id %in% c("oligo_1887", "oligo_4269"))) +
theme_light(base_size = 11) +
xlab(expression(paste("Mean normalized counts ", '(', log[2], ")"))) +
ylab(expression(paste("Fold change ", '(', log[2], " 1D3/CDK4)")))
# Create new DESeq2 object with poor samples removed
ddsMat.subset = ddsMat.cd8[,which(colnames(assay(ddsMat.cd8)) %in% c("cdk453_a", "cdk453_b", "id3_a", "id3_b"))]
ddsMat.subset$Group = droplevels(ddsMat.subset$Group)
ddsMat.subset$Group = relevel(ddsMat.subset$Group, "CDK4")
# Run stats
ddsMat.subset <- DESeq(ddsMat.subset)
## using pre-existing size factors
## estimating dispersions
## found already estimated dispersions, replacing these
## gene-wise dispersion estimates
## mean-dispersion relationship
## final dispersion estimates
## fitting model and testing
# Get results (ID3 / CDK4)
results(ddsMat.subset, alpha = 0.05) %>%
summary()
##
## out of 4750 with nonzero total read count
## adjusted p-value < 0.05
## LFC > 0 (up) : 4, 0.084%
## LFC < 0 (down) : 5, 0.11%
## outliers [1] : 0, 0%
## low counts [2] : 0, 0%
## (mean count < 0)
## [1] see 'cooksCutoff' argument of ?results
## [2] see 'independentFiltering' argument of ?results
# Get table of results
id3.cdk4.res = results(ddsMat.subset, alpha = 0.05)
# View table
id3.cdk4.res %>%
as.data.frame() %>%
rownames_to_column("Id") %>%
filter(padj < 0.05)
## Id baseMean log2FoldChange lfcSE stat pvalue
## 1 oligo_1887 804.7891 2.9557084 0.19686979 15.013520 5.988381e-51
## 2 oligo_1895 6743.7982 0.3322023 0.06410123 5.182463 2.189751e-07
## 3 oligo_1899 1499.7890 -2.4666321 0.16309495 -15.123902 1.126636e-51
## 4 oligo_2645 2861.1253 -0.4344652 0.10762636 -4.036792 5.418701e-05
## 5 oligo_3295 2439.4898 -1.2441102 0.11006520 -11.303393 1.262446e-29
## 6 oligo_3425 2457.9234 0.4598083 0.10624364 4.327866 1.505608e-05
## 7 oligo_4269 730.6316 4.6676557 0.27483605 16.983419 1.089508e-64
## 8 oligo_4281 1793.0922 -4.1501970 0.15374555 -26.993932 1.741353e-160
## 9 oligo_913 2207.1302 -0.7225103 0.10961534 -6.591325 4.359196e-11
## padj
## 1 7.111203e-48
## 2 1.485903e-04
## 3 1.783841e-48
## 4 2.859870e-02
## 5 1.199324e-26
## 6 8.939545e-03
## 7 2.587582e-61
## 8 8.271426e-157
## 9 3.451030e-08
# Show quick plot of significance ()
DESeq2::plotMA(id3.cdk4.res)
# Get average between the replicates
d10.mean = cd8.norm %>%
mutate(Mock_mean = rowMeans(select(., mock_a, mock_b)),
D10_mean = rowMeans(select(., d1_10_a, d1_10_b))) %>%
mutate(M = log2(D10_mean / Mock_mean),
A = (log2(D10_mean) + log2(Mock_mean)) / 2)
# Plot scatterplot
d10.scatter = d10.mean %>%
ggplot(aes(x = log10(Mock_mean), y = log10(D10_mean))) +
geom_point(colour = alpha("grey", 0.7)) +
geom_point(colour = alpha("red", 0.7), data = filter(d10.mean, Id %in% c("oligo_4281", "oligo_1899"))) +
geom_point(colour = alpha("orange", 0.7), data = filter(d10.mean, Id %in% c("oligo_1888", "oligo_4270"))) +
geom_point(colour = alpha("lightblue", 0.7), data = filter(d10.mean, Id %in% c("oligo_913", "oligo_3295"))) +
geom_point(colour = alpha("darkgreen", 0.7), data = filter(d10.mean, Id %in% c("oligo_1887", "oligo_4269"))) +
theme_light(base_size = 11) +
xlab("Mock normalized counts (log10)") +
ylab("D10 normalized counts (log10)")
d10.scatter
# Plot MA plot
d10.ma = d10.mean %>%
ggplot(aes(x = A, y = M)) +
geom_point(colour = alpha("grey", 0.5)) +
geom_point(colour = alpha("red", 0.7), data = filter(d10.mean, Id %in% c("oligo_4281", "oligo_1899"))) +
geom_point(colour = alpha("orange", 0.7), data = filter(d10.mean, Id %in% c("oligo_1888", "oligo_4270"))) +
geom_point(colour = alpha("lightblue", 0.7), data = filter(d10.mean, Id %in% c("oligo_913", "oligo_3295"))) +
geom_point(colour = alpha("darkgreen", 0.7), data = filter(d10.mean, Id %in% c("oligo_1887", "oligo_4269"))) +
theme_light(base_size = 11) +
ylim(-5, 5) +
xlab(expression(paste("Mean normalized counts ", '(', log[2], ")"))) +
ylab(expression(paste("Fold change ", '(', log[2], " D10/Mock)")))
d10.ma
# Create new DESeq2 object with poor samples removed
ddsMat.subset = ddsMat.cd8[,which(colnames(assay(ddsMat.cd8)) %in% c("mock_a", "mock_b", "d1_10_a", "d1_10_b"))]
ddsMat.subset$Group = droplevels(ddsMat.subset$Group)
ddsMat.subset$Group = relevel(ddsMat.subset$Group, "Mock")
# Run stats
ddsMat.subset <- DESeq(ddsMat.subset)
## using pre-existing size factors
## estimating dispersions
## found already estimated dispersions, replacing these
## gene-wise dispersion estimates
## mean-dispersion relationship
## final dispersion estimates
## fitting model and testing
# Get results (D10 / Mock)
results(ddsMat.subset, alpha = 0.05) %>%
summary()
##
## out of 4759 with nonzero total read count
## adjusted p-value < 0.05
## LFC > 0 (up) : 0, 0%
## LFC < 0 (down) : 7, 0.15%
## outliers [1] : 0, 0%
## low counts [2] : 0, 0%
## (mean count < 0)
## [1] see 'cooksCutoff' argument of ?results
## [2] see 'independentFiltering' argument of ?results
# Get table of results
d10.mock.res = results(ddsMat.subset, alpha = 0.05)
# View table
d10.mock.res %>%
as.data.frame() %>%
rownames_to_column("Id") %>%
filter(padj < 0.05)
## Id baseMean log2FoldChange lfcSE stat pvalue
## 1 oligo_1887 800.8005 -2.5072726 0.2217680 -11.305835 1.227801e-29
## 2 oligo_1895 6718.5792 -0.3428076 0.0764119 -4.486312 7.246673e-06
## 3 oligo_1899 1621.4895 -2.5555430 0.1588604 -16.086722 3.161408e-58
## 4 oligo_3295 2611.2699 -0.7461890 0.1170938 -6.372577 1.858780e-10
## 5 oligo_3425 2275.5100 -0.5964802 0.1221796 -4.881996 1.050174e-06
## 6 oligo_4269 624.8460 -4.2270058 0.2828515 -14.944259 1.697680e-50
## 7 oligo_4281 1735.4737 -4.3506053 0.1653703 -26.308258 1.542788e-152
## padj
## 1 1.460776e-26
## 2 4.926702e-03
## 3 7.522572e-55
## 4 1.769187e-07
## 5 8.329633e-04
## 6 2.693086e-47
## 7 7.342126e-149
# Show quick plot of significance ()
DESeq2::plotMA(d10.mock.res)
# Get average between the replicates
d100.mean = cd8.norm %>%
mutate(Mock_mean = rowMeans(select(., mock_a, mock_b)),
D100_mean = rowMeans(select(., d1_100_a, d1_100_b))) %>%
mutate(M = log2(D100_mean / Mock_mean),
A = (log2(D100_mean) + log2(Mock_mean)) / 2)
# Plot scatterplot
d100.scatter = d100.mean %>%
ggplot(aes(x = log10(Mock_mean), y = log10(D100_mean))) +
geom_point(colour = alpha("grey", 0.7)) +
geom_point(colour = alpha("red", 0.7), data = filter(d100.mean, Id %in% c("oligo_4281", "oligo_1899"))) +
geom_point(colour = alpha("orange", 0.7), data = filter(d100.mean, Id %in% c("oligo_1888", "oligo_4270"))) +
geom_point(colour = alpha("lightblue", 0.7), data = filter(d100.mean, Id %in% c("oligo_913", "oligo_3295"))) +
geom_point(colour = alpha("darkgreen", 0.7), data = filter(d100.mean, Id %in% c("oligo_1887", "oligo_4269"))) +
theme_light(base_size = 11) +
xlab("Mock normalized counts (log10)") +
ylab("D100 normalized counts (log10)")
d100.scatter
# Plot MA plot
d100.ma = d100.mean %>%
ggplot(aes(x = A, y = M)) +
geom_point(colour = alpha("grey", 0.5)) +
geom_point(colour = alpha("red", 0.7), data = filter(d100.mean, Id %in% c("oligo_4281", "oligo_1899"))) +
geom_point(colour = alpha("orange", 0.7), data = filter(d100.mean, Id %in% c("oligo_1888", "oligo_4270"))) +
geom_point(colour = alpha("lightblue", 0.7), data = filter(d100.mean, Id %in% c("oligo_913", "oligo_3295"))) +
geom_point(colour = alpha("darkgreen", 0.7), data = filter(d100.mean, Id %in% c("oligo_1887", "oligo_4269"))) +
theme_light(base_size = 11) +
ylim(-5, 5) +
xlab(expression(paste("Mean normalized counts ", '(', log[2], ")"))) +
ylab(expression(paste("Fold change ", '(', log[2], " D100/Mock)")))
d100.ma
# Create new DESeq2 object with poor samples removed
ddsMat.subset = ddsMat.cd8[,which(colnames(assay(ddsMat.cd8)) %in% c("mock_a", "mock_b", "d1_100_a", "d1_100_b"))]
ddsMat.subset$Group = droplevels(ddsMat.subset$Group)
ddsMat.subset$Group = relevel(ddsMat.subset$Group, "Mock")
# Run stats
ddsMat.subset <- DESeq(ddsMat.subset)
## using pre-existing size factors
## estimating dispersions
## found already estimated dispersions, replacing these
## gene-wise dispersion estimates
## mean-dispersion relationship
## -- note: fitType='parametric', but the dispersion trend was not well captured by the
## function: y = a/x + b, and a local regression fit was automatically substituted.
## specify fitType='local' or 'mean' to avoid this message next time.
## final dispersion estimates
## fitting model and testing
# Get results (D100 / Mock)
results(ddsMat.subset, alpha = 0.05) %>%
summary()
##
## out of 4763 with nonzero total read count
## adjusted p-value < 0.05
## LFC > 0 (up) : 9, 0.19%
## LFC < 0 (down) : 13, 0.27%
## outliers [1] : 0, 0%
## low counts [2] : 0, 0%
## (mean count < 0)
## [1] see 'cooksCutoff' argument of ?results
## [2] see 'independentFiltering' argument of ?results
# Get table of results
d100.mock.res = results(ddsMat.subset, alpha = 0.05)
# View table
d100.mock.res %>%
as.data.frame() %>%
rownames_to_column("Id") %>%
filter(padj < 0.05)
## Id baseMean log2FoldChange lfcSE stat pvalue
## 1 oligo_1160 5517.9690 0.2744181 0.07135514 3.845807 1.201561e-04
## 2 oligo_1202 5002.0315 -0.2653457 0.06468634 -4.102036 4.095311e-05
## 3 oligo_1290 652.8729 0.7138063 0.18876227 3.781509 1.558804e-04
## 4 oligo_1416 2380.3230 -0.3554626 0.09489771 -3.745744 1.798597e-04
## 5 oligo_1608 1771.1818 -0.4636330 0.11913279 -3.891733 9.953072e-05
## 6 oligo_1787 3589.1703 -0.3045255 0.07448680 -4.088315 4.345174e-05
## 7 oligo_1887 837.8370 -2.1215428 0.18840763 -11.260387 2.058525e-29
## 8 oligo_1895 6822.2782 -0.2933034 0.06484763 -4.522962 6.098009e-06
## 9 oligo_1899 1685.6399 -2.2079337 0.12426590 -17.767816 1.254810e-70
## 10 oligo_199 1291.5294 0.5702755 0.13792906 4.134557 3.556401e-05
## 11 oligo_2684 2278.7900 -0.4067036 0.09600833 -4.236128 2.274074e-05
## 12 oligo_2837 5393.8335 0.3333689 0.07519533 4.433373 9.277030e-06
## 13 oligo_3340 2039.9874 0.4099799 0.10343249 3.963744 7.378337e-05
## 14 oligo_3383 1962.8270 -0.4430616 0.11081468 -3.998221 6.382037e-05
## 15 oligo_4071 1726.9442 -0.4747199 0.11520062 -4.120810 3.775425e-05
## 16 oligo_4269 716.7704 -2.2632358 0.53496867 -4.230595 2.330739e-05
## 17 oligo_4281 1916.8408 -2.6562865 0.11289935 -23.527917 2.113133e-122
## 18 oligo_4509 2842.9843 0.3258153 0.08605918 3.785945 1.531255e-04
## 19 oligo_4750 3009.7909 0.3492666 0.08266609 4.225029 2.389105e-05
## 20 oligo_504 2322.6317 0.4146858 0.09744284 4.255682 2.084125e-05
## 21 oligo_622 3325.7133 -0.2936877 0.07720321 -3.804086 1.423284e-04
## 22 oligo_779 2795.2172 0.3861536 0.09520987 4.055815 4.995977e-05
## padj
## 1 3.179465e-02
## 2 1.592005e-02
## 3 3.535516e-02
## 4 3.893963e-02
## 5 2.788617e-02
## 6 1.592005e-02
## 7 3.268252e-26
## 8 7.261204e-03
## 9 2.988329e-67
## 10 1.592005e-02
## 11 1.264367e-02
## 12 8.837299e-03
## 13 2.196439e-02
## 14 2.026509e-02
## 15 1.592005e-02
## 16 1.264367e-02
## 17 1.006485e-118
## 18 3.535516e-02
## 19 1.264367e-02
## 20 1.264367e-02
## 21 3.535516e-02
## 22 1.699703e-02
# Show quick plot of significance ()
DESeq2::plotMA(d100.mock.res)
# Get average between the replicates
d300.mean = cd8.norm %>%
mutate(Mock_mean = rowMeans(select(., mock_a, mock_b)),
D300_mean = rowMeans(select(., d1_300_a, d1_300_b))) %>%
mutate(M = log2(D300_mean / Mock_mean),
A = (log2(D300_mean) + log2(Mock_mean)) / 2)
# Plot scatterplot
d300.scatter = d300.mean %>%
ggplot(aes(x = log10(Mock_mean), y = log10(D300_mean))) +
geom_point(colour = alpha("grey", 0.7)) +
geom_point(colour = alpha("red", 0.7), data = filter(d300.mean, Id %in% c("oligo_4281", "oligo_1899"))) +
geom_point(colour = alpha("orange", 0.7), data = filter(d300.mean, Id %in% c("oligo_1888", "oligo_4270"))) +
geom_point(colour = alpha("lightblue", 0.7), data = filter(d300.mean, Id %in% c("oligo_913", "oligo_3295"))) +
geom_point(colour = alpha("darkgreen", 0.7), data = filter(d300.mean, Id %in% c("oligo_1887", "oligo_4269"))) +
theme_light(base_size = 11) +
xlab("Mock normalized counts (log10)") +
ylab("D300 normalized counts (log10)")
d300.scatter
# Plot MA plot
d300.ma = d300.mean %>%
ggplot(aes(x = A, y = M)) +
geom_point(colour = alpha("grey", 0.5)) +
geom_point(colour = alpha("red", 0.7), data = filter(d300.mean, Id %in% c("oligo_4281", "oligo_1899"))) +
geom_point(colour = alpha("orange", 0.7), data = filter(d300.mean, Id %in% c("oligo_1888", "oligo_4270"))) +
geom_point(colour = alpha("lightblue", 0.7), data = filter(d300.mean, Id %in% c("oligo_913", "oligo_3295"))) +
geom_point(colour = alpha("darkgreen", 0.7), data = filter(d300.mean, Id %in% c("oligo_1887", "oligo_4269"))) +
theme_light(base_size = 11) +
ylim(-5, 5) +
xlab(expression(paste("Mean normalized counts ", '(', log[2], ")"))) +
ylab(expression(paste("Fold change ", '(', log[2], " D300/Mock)")))
d300.ma
# Create new DESeq2 object with poor samples removed
ddsMat.subset = ddsMat.cd8[,which(colnames(assay(ddsMat.cd8)) %in% c("mock_a", "mock_b", "d1_300_a", "d1_300_b"))]
ddsMat.subset$Group = droplevels(ddsMat.subset$Group)
ddsMat.subset$Group = relevel(ddsMat.subset$Group, "Mock")
# Run stats
ddsMat.subset <- DESeq(ddsMat.subset)
## using pre-existing size factors
## estimating dispersions
## found already estimated dispersions, replacing these
## gene-wise dispersion estimates
## mean-dispersion relationship
## -- note: fitType='parametric', but the dispersion trend was not well captured by the
## function: y = a/x + b, and a local regression fit was automatically substituted.
## specify fitType='local' or 'mean' to avoid this message next time.
## final dispersion estimates
## fitting model and testing
# Get results (D300 / Mock)
results(ddsMat.subset, alpha = 0.05) %>%
summary()
##
## out of 4762 with nonzero total read count
## adjusted p-value < 0.05
## LFC > 0 (up) : 8, 0.17%
## LFC < 0 (down) : 7, 0.15%
## outliers [1] : 0, 0%
## low counts [2] : 0, 0%
## (mean count < 0)
## [1] see 'cooksCutoff' argument of ?results
## [2] see 'independentFiltering' argument of ?results
# Get table of results
d300.mock.res = results(ddsMat.subset, alpha = 0.05)
# View table
d300.mock.res %>%
as.data.frame() %>%
rownames_to_column("Id") %>%
filter(padj < 0.05)
## Id baseMean log2FoldChange lfcSE stat pvalue
## 1 oligo_1000 3066.379 0.3295100 0.08449434 3.899787 9.627717e-05
## 2 oligo_1008 5925.682 -0.2551563 0.06265047 -4.072696 4.647206e-05
## 3 oligo_1274 2022.303 0.4862157 0.10492878 4.633769 3.590678e-06
## 4 oligo_1757 2257.253 0.3634583 0.09566470 3.799293 1.451093e-04
## 5 oligo_1803 1840.728 -0.4749228 0.11740305 -4.045234 5.227089e-05
## 6 oligo_1887 1127.674 -0.6098858 0.14386865 -4.239185 2.243327e-05
## 7 oligo_1899 2044.892 -1.0719983 0.11441785 -9.369152 7.311744e-21
## 8 oligo_199 1295.388 0.5784940 0.14433208 4.008076 6.121535e-05
## 9 oligo_2948 4159.132 -0.2676614 0.06809504 -3.930703 8.469790e-05
## 10 oligo_3105 1757.511 0.4309627 0.11078074 3.890231 1.001487e-04
## 11 oligo_3272 1798.766 0.4108322 0.10728245 3.829445 1.284325e-04
## 12 oligo_4129 4365.094 0.2947775 0.07047939 4.182463 2.883673e-05
## 13 oligo_4281 2355.140 -1.2394346 0.10062181 -12.317753 7.269179e-35
## 14 oligo_4488 1962.145 0.3928846 0.10224162 3.842707 1.216846e-04
## 15 oligo_4557 2244.526 -0.3820882 0.09905877 -3.857187 1.146996e-04
## padj
## 1 4.335530e-02
## 2 3.555914e-02
## 3 5.699603e-03
## 4 4.606735e-02
## 5 3.555914e-02
## 6 2.670681e-02
## 7 1.740926e-17
## 8 3.643844e-02
## 9 4.335530e-02
## 10 4.335530e-02
## 11 4.368539e-02
## 12 2.746411e-02
## 13 3.461583e-31
## 14 4.368539e-02
## 15 4.368539e-02
# Show quick plot of significance ()
DESeq2::plotMA(d300.mock.res)
# Get average between the replicates
d1000.mean = cd8.norm %>%
mutate(Mock_mean = rowMeans(select(., mock_a, mock_b)),
D1000_mean = rowMeans(select(., d1_1000_a, d1_1000_b))) %>%
mutate(M = log2(D1000_mean / Mock_mean),
A = (log2(D1000_mean) + log2(Mock_mean)) / 2)
# Plot scatterplot
d1000.scatter = d1000.mean %>%
ggplot(aes(x = log10(Mock_mean), y = log10(D1000_mean))) +
geom_point(colour = alpha("grey", 0.7)) +
geom_point(colour = alpha("red", 0.7), data = filter(d1000.mean, Id %in% c("oligo_4281", "oligo_1899"))) +
geom_point(colour = alpha("orange", 0.7), data = filter(d1000.mean, Id %in% c("oligo_1888", "oligo_4270"))) +
geom_point(colour = alpha("lightblue", 0.7), data = filter(d1000.mean, Id %in% c("oligo_913", "oligo_3295"))) +
geom_point(colour = alpha("darkgreen", 0.7), data = filter(d1000.mean, Id %in% c("oligo_1887", "oligo_4269"))) +
theme_light(base_size = 11) +
xlab("Mock normalized counts (log10)") +
ylab("D1000 normalized counts (log10)")
d1000.scatter
# Plot MA plot
d1000.ma = d1000.mean %>%
ggplot(aes(x = A, y = M)) +
geom_point(colour = alpha("grey", 0.5)) +
geom_point(colour = alpha("red", 0.7), data = filter(d1000.mean, Id %in% c("oligo_4281", "oligo_1899"))) +
geom_point(colour = alpha("orange", 0.7), data = filter(d1000.mean, Id %in% c("oligo_1888", "oligo_4270"))) +
geom_point(colour = alpha("lightblue", 0.7), data = filter(d1000.mean, Id %in% c("oligo_913", "oligo_3295"))) +
geom_point(colour = alpha("darkgreen", 0.7), data = filter(d1000.mean, Id %in% c("oligo_1887", "oligo_4269"))) +
theme_light(base_size = 11) +
ylim(-5, 5) +
xlab(expression(paste("Mean normalized counts ", '(', log[2], ")"))) +
ylab(expression(paste("Fold change ", '(', log[2], " D1000/Mock)")))
d1000.ma
# Create new DESeq2 object with poor samples removed
ddsMat.subset = ddsMat.cd8[,which(colnames(assay(ddsMat.cd8)) %in% c("mock_a", "mock_b", "d1_1000_a", "d1_1000_b"))]
ddsMat.subset$Group = droplevels(ddsMat.subset$Group)
ddsMat.subset$Group = relevel(ddsMat.subset$Group, "Mock")
# Run stats
ddsMat.subset <- DESeq(ddsMat.subset)
## using pre-existing size factors
## estimating dispersions
## found already estimated dispersions, replacing these
## gene-wise dispersion estimates
## mean-dispersion relationship
## final dispersion estimates
## fitting model and testing
# Get results (D1000 / Mock)
results(ddsMat.subset, alpha = 0.05) %>%
summary()
##
## out of 4762 with nonzero total read count
## adjusted p-value < 0.05
## LFC > 0 (up) : 0, 0%
## LFC < 0 (down) : 2, 0.042%
## outliers [1] : 0, 0%
## low counts [2] : 0, 0%
## (mean count < 0)
## [1] see 'cooksCutoff' argument of ?results
## [2] see 'independentFiltering' argument of ?results
# Get table of results
d1000.mock.res = results(ddsMat.subset, alpha = 0.05)
# View table
d1000.mock.res %>%
as.data.frame() %>%
rownames_to_column("Id") %>%
filter(padj < 0.05)
## Id baseMean log2FoldChange lfcSE stat pvalue
## 1 oligo_311 1883.486 -0.5499452 0.1252935 -4.389257 1.137384e-05
## 2 oligo_3425 2306.650 -0.5487480 0.1177323 -4.660981 3.147060e-06
## padj
## 1 0.02708112
## 2 0.01498630
# Show quick plot of significance ()
DESeq2::plotMA(d1000.mock.res)
# Scatter plot
d10.scatter + d100.scatter + d300.scatter + d1000.scatter
# MA plot
d10.ma + d100.ma + d300.ma + d1000.ma
# Rank
cd8.norm.long %>%
group_by(Id) %>%
summarise(sum = sum(norm.counts),
mean = mean(norm.counts),
median = median(norm.counts),
sd = sd(norm.counts),
geomean = gm_mean(norm.counts),
cv = sd(norm.counts) / mean(norm.counts)) %>%
mutate(rank = rank(mean)) %>%
ggplot(aes(x = rank, y = log10(mean))) +
geom_point() +
theme_minimal()
# Get mean counts per oligo
cd8.mean = cd8.norm.long %>%
group_by(Id) %>%
summarise(sum = sum(norm.counts),
mean = mean(norm.counts),
median = median(norm.counts),
sd = sd(norm.counts),
geomean = gm_mean(norm.counts),
cv = sd(norm.counts) / mean(norm.counts))
# Get quantiles
quantile(cd8.mean$median , seq(from = 0, to = 0.10, by = 0.005))
## 0% 0.5% 1% 1.5% 2% 2.5% 3%
## 0.000000 0.318799 1.062006 1.978373 9.955168 27.003683 74.638207
## 3.5% 4% 4.5% 5% 5.5% 6% 6.5%
## 142.062296 218.769373 319.850709 396.491177 445.067499 500.165253 551.465274
## 7% 7.5% 8% 8.5% 9% 9.5% 10%
## 592.921535 651.214172 689.947011 741.235201 767.365712 801.337192 831.117392
# Histogram with 5% cutoff
cd8.norm.long %>%
group_by(Id) %>%
summarise(total.count = sum(norm.counts),
mean.count = mean(norm.counts),
median = median(norm.counts),
geomean = gm_mean(norm.counts),
sd.count = sd(norm.counts)) %>%
ggplot(aes(x = log10(median))) +
geom_histogram(binwidth = 0.25, colour = "black", fill = "white") +
geom_density(alpha = 0.2, fill = "#FF6666") +
geom_vline(xintercept = log10(396.491177), color = "red") +
theme_light(base_size = 11) +
xlab("Normalized reads per oligo (log10)")
# Get ID's within the limits
## Cutoff = 5%
idx <- cd8.norm.long %>%
group_by(Id) %>%
summarise(sum = sum(norm.counts),
mean = mean(norm.counts),
median = median(norm.counts),
geomean = gm_mean(norm.counts),
sd = sd(norm.counts) ,
c.v = sd/mean) %>%
filter(median >= 396.491177) %>%
pull(Id)
# Filter table
cd8.norm.fil = cd8.norm %>%
filter(Id %in% idx)
# Write filter table to disk
write_tsv(cd8.norm, "tables/cd8-counts-norm-filtered.tsv")
# Get dimensions
dim(cd8.norm)
## [1] 4764 15
dim(cd8.norm.fil)
## [1] 4525 15
# Create long dataframe
cd8.norm.fil.long = cd8.norm.fil %>%
gather(sample, norm.counts, -Id)
# Plot boxplot
cd8.norm.fil.long %>%
ggplot(aes(x = sample, y = log10(norm.counts + 1))) +
geom_boxplot() +
theme_light(base_size = 11) +
theme(axis.text.x = element_text(angle = 90)) +
xlab("") +
ylab("Normalized counts (log10)") +
ggtitle('Median-normalized data w/ filtering')
# Histogram all
cd8.norm.fil.long %>%
ggplot(aes(x = log10(norm.counts))) +
geom_histogram(aes(y=..density..), binwidth=0.25, colour="black", fill="white") +
geom_density(alpha = 0.2, fill = "#FF6666") +
facet_wrap(sample~.) +
theme_light(base_size = 11) +
ylab("Density") +
xlab("(Filtered) Normalized reads per oligo (log10)")
# Get average between the replicates
id3.mean = cd8.norm.fil %>%
mutate(Mock_mean = rowMeans(select(., mock_a, mock_b)),
CDK4_mean = rowMeans(select(., cdk453_a, cdk453_b)),
ID3_mean = rowMeans(select(., id3_a, id3_b))) %>%
mutate(M = log2(ID3_mean / CDK4_mean),
A = (log2(ID3_mean) + log2(CDK4_mean)) / 2)
# Plot MA plot
id3.mean %>%
ggplot(aes(x = A, y = M)) +
geom_point(colour = alpha("grey", 0.5)) +
geom_point(size = 3.5, colour = rgb(237,157,148, max=255), data = filter(id3.mean, Id %in% c("oligo_4281", "oligo_1899"))) + #MART1-Ag
geom_point(size = 3.5, colour = rgb(199,150,149, max=255), data = filter(id3.mean, Id %in% c("oligo_913", "oligo_3295"))) + #MART1-W
geom_point(size = 3.5, colour = rgb(102,165,165, max=255), data = filter(id3.mean, Id %in% c("oligo_1887", "oligo_4269"))) + #CDK4-Ag
geom_point(size = 3.5, colour = rgb(88,121,163, max=255), data = filter(id3.mean, Id %in% c("oligo_1888", "oligo_4270"))) + #CDK4-WT
theme_light(base_size = 11) +
xlab(expression(paste("Mean normalized counts ", '(', log[2], ")"))) +
ylab(expression(paste("Fold change ", '(', log[2], " 1D3/CDK4)")))
### Titration data
# 1:10
d10.mean = cd8.norm.fil %>%
mutate(Mock_mean = rowMeans(select(., mock_a, mock_b)),
D10_mean = rowMeans(select(., d1_10_a, d1_10_b))) %>%
mutate(M = log2(D10_mean / Mock_mean),
A = (log2(D10_mean) + log2(Mock_mean)) / 2)
d10.plot = d10.mean %>%
ggplot(aes(x = A, y = M)) +
geom_point(colour = alpha("grey", 0.5)) +
geom_point(size = 4.5, colour = rgb(237,157,148, max=255), data = filter(d10.mean, Id %in% c("oligo_4281", "oligo_1899"))) + #MART1-Ag
geom_point(size = 4.5, colour = rgb(199,150,149, max=255), data = filter(d10.mean, Id %in% c("oligo_913", "oligo_3295"))) + #MART1-W
geom_point(size = 4.5, colour = rgb(102,165,165, max=255), data = filter(d10.mean, Id %in% c("oligo_1887", "oligo_4269"))) + #CDK4-Ag
geom_point(size = 4.5, colour = rgb(88,121,163, max=255), data = filter(d10.mean, Id %in% c("oligo_1888", "oligo_4270"))) + #CDK4-WT
theme_light(base_size = 11) +
ylim(-5, 1) +
xlim(7, 14) +
xlab("") +
ylab("")
d10.plot
# 1:100
d100.mean = cd8.norm.fil %>%
mutate(Mock_mean = rowMeans(select(., mock_a, mock_b)),
D100_mean = rowMeans(select(., d1_100_a, d1_100_b))) %>%
mutate(M = log2(D100_mean / Mock_mean),
A = (log2(D100_mean) + log2(Mock_mean)) / 2)
d100.plot = d100.mean %>%
ggplot(aes(x = A, y = M)) +
geom_point(colour = alpha("grey", 0.5)) +
geom_point(size = 4.5, colour = rgb(237,157,148, max=255), data = filter(d100.mean, Id %in% c("oligo_4281", "oligo_1899"))) + #MART1-Ag
geom_point(size = 4.5, colour = rgb(199,150,149, max=255), data = filter(d100.mean, Id %in% c("oligo_913", "oligo_3295"))) + #MART1-W
geom_point(size = 4.5, colour = rgb(102,165,165, max=255), data = filter(d100.mean, Id %in% c("oligo_1887", "oligo_4269"))) + #CDK4-Ag
geom_point(size = 4.5, colour = rgb(88,121,163, max=255), data = filter(d100.mean, Id %in% c("oligo_1888", "oligo_4270"))) + #CDK4-WT
theme_light(base_size = 11) +
ylim(-5, 1) +
xlim(7, 14) +
xlab("") +
ylab("")
d100.plot
# 1:300
d300.mean = cd8.norm.fil %>%
mutate(Mock_mean = rowMeans(select(., mock_a, mock_b)),
D300_mean = rowMeans(select(., d1_300_a, d1_300_b))) %>%
mutate(M = log2(D300_mean / Mock_mean),
A = (log2(D300_mean) + log2(Mock_mean)) / 2)
d300.plot = d300.mean %>%
ggplot(aes(x = A, y = M)) +
geom_point(colour = alpha("grey", 0.5)) +
geom_point(size = 4.5, colour = rgb(237,157,148, max=255), data = filter(d300.mean, Id %in% c("oligo_4281", "oligo_1899"))) + #MART1-Ag
geom_point(size = 4.5, colour = rgb(199,150,149, max=255), data = filter(d300.mean, Id %in% c("oligo_913", "oligo_3295"))) + #MART1-W
geom_point(size = 4.5, colour = rgb(102,165,165, max=255), data = filter(d300.mean, Id %in% c("oligo_1887", "oligo_4269"))) + #CDK4-Ag
geom_point(size = 4.5, colour = rgb(88,121,163, max=255), data = filter(d300.mean, Id %in% c("oligo_1888", "oligo_4270"))) + #CDK4-WT
theme_light(base_size = 11) +
ylim(-5, 1) +
xlim(7, 14) +
xlab("") +
ylab(expression(paste("Fold change ", '(', log[2], " D300/Mock)")))
d300.plot
# 1:1000
d1000.mean = cd8.norm.fil %>%
mutate(Mock_mean = rowMeans(select(., mock_a, mock_b)),
D1000_mean = rowMeans(select(., d1_1000_a, d1_1000_b))) %>%
mutate(M = log2(D1000_mean / Mock_mean),
A = (log2(D1000_mean) + log2(Mock_mean)) / 2)
d1000.plot = d1000.mean %>%
ggplot(aes(x = A, y = M)) +
geom_point(colour = alpha("grey", 0.5)) +
geom_point(size = 4.5, colour = rgb(237,157,148, max=255), data = filter(d1000.mean, Id %in% c("oligo_4281", "oligo_1899"))) + #MART1-Ag
geom_point(size = 4.5, colour = rgb(199,150,149, max=255), data = filter(d1000.mean, Id %in% c("oligo_913", "oligo_3295"))) + #MART1-W
geom_point(size = 4.5, colour = rgb(102,165,165, max=255), data = filter(d1000.mean, Id %in% c("oligo_1887", "oligo_4269"))) + #CDK4-Ag
geom_point(size = 4.5, colour = rgb(88,121,163, max=255), data = filter(d1000.mean, Id %in% c("oligo_1888", "oligo_4270"))) + #CDK4-WT
theme_light(base_size = 11) +
ylim(-5, 1) +
xlim(7, 14) +
xlab(expression(paste("Mean normalized counts ", '(', log[2], ")"))) +
ylab("")
d1000.plot
# Plot as single panel
d10.plot / d100.plot / d300.plot / d1000.plot
sessionInfo()
## R version 4.0.3 (2020-10-10)
## Platform: x86_64-pc-linux-gnu (64-bit)
## Running under: Ubuntu 20.04.2 LTS
##
## Matrix products: default
## BLAS: /usr/lib/x86_64-linux-gnu/openblas-pthread/libblas.so.3
## LAPACK: /usr/lib/x86_64-linux-gnu/openblas-pthread/liblapack.so.3
##
## locale:
## [1] LC_CTYPE=en_US.UTF-8 LC_NUMERIC=C
## [3] LC_TIME=en_US.UTF-8 LC_COLLATE=en_US.UTF-8
## [5] LC_MONETARY=en_US.UTF-8 LC_MESSAGES=en_US.UTF-8
## [7] LC_PAPER=en_US.UTF-8 LC_NAME=C
## [9] LC_ADDRESS=C LC_TELEPHONE=C
## [11] LC_MEASUREMENT=en_US.UTF-8 LC_IDENTIFICATION=C
##
## attached base packages:
## [1] parallel stats4 stats graphics grDevices utils datasets
## [8] methods base
##
## other attached packages:
## [1] ggpubr_0.4.0 forcats_0.5.1
## [3] stringr_1.4.0 dplyr_1.0.5
## [5] purrr_0.3.4 readr_1.4.0
## [7] tidyr_1.1.3 tibble_3.1.0
## [9] ggplot2_3.3.3 tidyverse_1.3.0
## [11] readxl_1.3.1 DESeq2_1.30.1
## [13] SummarizedExperiment_1.20.0 Biobase_2.50.0
## [15] MatrixGenerics_1.2.1 matrixStats_0.58.0
## [17] GenomicRanges_1.42.0 GenomeInfoDb_1.26.4
## [19] IRanges_2.24.1 S4Vectors_0.28.1
## [21] BiocGenerics_0.36.0 ggsci_2.9
## [23] patchwork_1.1.1 knitr_1.31
##
## loaded via a namespace (and not attached):
## [1] bitops_1.0-6 fs_1.5.0 lubridate_1.7.10
## [4] bit64_4.0.5 RColorBrewer_1.1-2 httr_1.4.2
## [7] tools_4.0.3 backports_1.2.1 bslib_0.2.4
## [10] utf8_1.2.1 R6_2.5.0 DBI_1.1.1
## [13] colorspace_2.0-0 withr_2.4.1 tidyselect_1.1.0
## [16] curl_4.3 bit_4.0.4 compiler_4.0.3
## [19] cli_2.3.1 rvest_1.0.0 xml2_1.3.2
## [22] DelayedArray_0.16.3 labeling_0.4.2 sass_0.3.1
## [25] scales_1.1.1 genefilter_1.72.1 digest_0.6.27
## [28] foreign_0.8-80 rmarkdown_2.7 rio_0.5.26
## [31] XVector_0.30.0 pkgconfig_2.0.3 htmltools_0.5.1.1
## [34] highr_0.8 dbplyr_2.1.0 fastmap_1.1.0
## [37] rlang_0.4.10 rstudioapi_0.13 RSQLite_2.2.5
## [40] farver_2.1.0 jquerylib_0.1.3 generics_0.1.0
## [43] jsonlite_1.7.2 BiocParallel_1.24.1 zip_2.1.1
## [46] car_3.0-10 RCurl_1.98-1.3 magrittr_2.0.1
## [49] GenomeInfoDbData_1.2.4 Matrix_1.2-18 Rcpp_1.0.6
## [52] munsell_0.5.0 fansi_0.4.2 abind_1.4-5
## [55] lifecycle_1.0.0 stringi_1.5.3 yaml_2.2.1
## [58] carData_3.0-4 zlibbioc_1.36.0 grid_4.0.3
## [61] blob_1.2.1 crayon_1.4.1 lattice_0.20-41
## [64] haven_2.3.1 splines_4.0.3 annotate_1.68.0
## [67] hms_1.0.0 locfit_1.5-9.4 pillar_1.5.1
## [70] ggsignif_0.6.1 geneplotter_1.68.0 reprex_1.0.0
## [73] XML_3.99-0.6 glue_1.4.2 evaluate_0.14
## [76] data.table_1.14.0 modelr_0.1.8 vctrs_0.3.7
## [79] cellranger_1.1.0 gtable_0.3.0 assertthat_0.2.1
## [82] cachem_1.0.4 openxlsx_4.2.3 xfun_0.22
## [85] xtable_1.8-4 broom_0.7.5 rstatix_0.7.0
## [88] survival_3.2-7 AnnotationDbi_1.52.0 memoise_2.0.0
## [91] ellipsis_0.3.1